---
title: "Cleaning UN GDP per capita"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load Vpart Dataset
  un_raw <- 
    import("../raw-data/x-un-gdppc/UNdata_Export_20220531_004023543.csv")
```

# Clean 

* Note, I use the GDP per capita estimate for mainlaind Tanzania, not including Zanzibar

```{r}
# clean
  un_clean <- 
    un_raw %>%
    filter(Year %in% c(1975:2020)) %>% # range of analysis data [first election to the last survey data]
    mutate(
      un_country_common = countryname(`Country or Area`),
      year = Year) %>% 
    filter(!is.na(un_country_common)) %>% 
    group_by(un_country_common, year) %>% 
      summarise(Value = mean(Value, na.rm = T)) %>% # there are duplicate observations in russia and ethiopia, 1990-1993; get annual average
    ungroup() %>%
    group_by(year) %>% 
      mutate(un_gdppd_2022_prices_median_for_given_year = median(Value)) %>% 
    ungroup() %>% 
    mutate(
      un_gdppd_2022_prices = Value,
      un_gdppd_2022_prices_above_median_for_given_year = (un_gdppd_2022_prices >= un_gdppd_2022_prices_median_for_given_year)*1
      ) %>% 
    select(
      un_country_common, year, starts_with("un_gdppd")) %>%
    na.omit()
```

# Save data

```{r}
  saveRDS(un_clean, "../cleaned-data/x-7-un-gdppc.rds")
```
